data = pd.read_csv('data/processed/tep_data.csv', index_col='Index')
print(f'Len of dataset: {data.shape[0]}')
Len of dataset: 12801
from src.models.lstm import lstm_model
import keras
build_params = dict(
input_length=window_length,
input_shape=X.shape[-1],
lstm_layers_size=[2, 2],
dropout_coeff=0.1,
optimizer=keras.optimizers.Adam(lr=0.01),
reg_strength=0.005,
)
fit_params = dict(
batch_size=64,
epochs=10,
verbose=1,
callbacks=[
keras.callbacks.ReduceLROnPlateau(patience=3),
keras.callbacks.EarlyStopping(min_delta=0.01, patience=5)
]
)
from src.utils import cross_validate
results = cross_validate(lambda: lstm_model(**build_params), 3, fit_params, X, y)
Fold 0... WARNING:tensorflow:From C:\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead. WARNING:tensorflow:From C:\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. WARNING:tensorflow:From C:\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead. WARNING:tensorflow:From C:\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead. WARNING:tensorflow:From C:\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version. Instructions for updating: Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`. WARNING:tensorflow:From C:\Anaconda3\lib\site-packages\keras\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead. WARNING:tensorflow:From C:\Anaconda3\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version. Instructions for updating: Use tf.where in 2.0, which has the same broadcast rule as np.where WARNING:tensorflow:From C:\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead. Train on 3186 samples, validate on 3183 samples Epoch 1/10 3186/3186 [==============================] - 6s 2ms/step - loss: 0.5834 - val_loss: 0.4676 Epoch 2/10 3186/3186 [==============================] - 4s 1ms/step - loss: 0.3928 - val_loss: 0.3842 Epoch 3/10 3186/3186 [==============================] - 4s 1ms/step - loss: 0.3622 - val_loss: 0.3694 Epoch 4/10 3186/3186 [==============================] - 4s 1ms/step - loss: 0.3558 - val_loss: 0.3675 Epoch 5/10 3186/3186 [==============================] - 4s 1ms/step - loss: 0.3518 - val_loss: 0.3601 Epoch 6/10 3186/3186 [==============================] - 4s 1ms/step - loss: 0.3482 - val_loss: 0.3575 Epoch 7/10 3186/3186 [==============================] - 4s 1ms/step - loss: 0.3463 - val_loss: 0.3520 Epoch 8/10 3186/3186 [==============================] - 4s 1ms/step - loss: 0.3442 - val_loss: 0.3518 Epoch 9/10 3186/3186 [==============================] - 4s 1ms/step - loss: 0.3428 - val_loss: 0.3516 Epoch 10/10 3186/3186 [==============================] - 4s 1ms/step - loss: 0.3413 - val_loss: 0.3491 Fold 1... Train on 6369 samples, validate on 3183 samples Epoch 1/10 6369/6369 [==============================] - 9s 1ms/step - loss: 0.4489 - val_loss: 0.3344 Epoch 2/10 6369/6369 [==============================] - 7s 1ms/step - loss: 0.3476 - val_loss: 0.3260 Epoch 3/10 6369/6369 [==============================] - 7s 1ms/step - loss: 0.3425 - val_loss: 0.3220 Epoch 4/10 6369/6369 [==============================] - 7s 1ms/step - loss: 0.3390 - val_loss: 0.3208 Epoch 5/10 6369/6369 [==============================] - 7s 1ms/step - loss: 0.3371 - val_loss: 0.3175 Epoch 6/10 6369/6369 [==============================] - 7s 1ms/step - loss: 0.3352 - val_loss: 0.3166 Epoch 7/10 6369/6369 [==============================] - 7s 1ms/step - loss: 0.3342 - val_loss: 0.3169 Epoch 8/10 6369/6369 [==============================] - 7s 1ms/step - loss: 0.3326 - val_loss: 0.3144 Fold 2... Train on 9552 samples, validate on 3183 samples Epoch 1/10 9552/9552 [==============================] - 12s 1ms/step - loss: 0.4034 - val_loss: 0.3492 Epoch 2/10 9552/9552 [==============================] - 10s 1ms/step - loss: 0.3373 - val_loss: 0.3439 Epoch 3/10 9552/9552 [==============================] - 10s 1ms/step - loss: 0.3311 - val_loss: 0.3351 Epoch 4/10 9552/9552 [==============================] - 10s 1ms/step - loss: 0.3269 - val_loss: 0.3327 Epoch 5/10 9552/9552 [==============================] - 10s 1ms/step - loss: 0.3253 - val_loss: 0.3307 Epoch 6/10 9552/9552 [==============================] - 10s 1ms/step - loss: 0.3243 - val_loss: 0.3314 Epoch 7/10 9552/9552 [==============================] - 10s 1ms/step - loss: 0.3240 - val_loss: 0.3293 Epoch 8/10 9552/9552 [==============================] - 10s 1ms/step - loss: 0.3240 - val_loss: 0.3309
from src.visualization.visualize import visualize_cv_result
visualize_cv_result(results, f'Window length = {window_length}')
model = lstm_model(**build_params)
model.fit(X, y, **fit_params)
Epoch 1/10 12735/12735 [==============================] - 16s 1ms/step - loss: 0.4269 Epoch 2/10 128/12735 [..............................] - ETA: 14s - loss: 0.3672
/home/elch10/miniconda3/lib/python3.6/site-packages/keras/callbacks.py:1109: RuntimeWarning: Reduce LR on plateau conditioned on metric `val_loss` which is not available. Available metrics are: loss,lr (self.monitor, ','.join(list(logs.keys()))), RuntimeWarning /home/elch10/miniconda3/lib/python3.6/site-packages/keras/callbacks.py:569: RuntimeWarning: Early stopping conditioned on metric `val_loss` which is not available. Available metrics are: loss,lr (self.monitor, ','.join(list(logs.keys()))), RuntimeWarning
12735/12735 [==============================] - 15s 1ms/step - loss: 0.3535 Epoch 3/10 12735/12735 [==============================] - 15s 1ms/step - loss: 0.3432 Epoch 4/10 12735/12735 [==============================] - 15s 1ms/step - loss: 0.3375 Epoch 5/10 12735/12735 [==============================] - 15s 1ms/step - loss: 0.3337 Epoch 6/10 12735/12735 [==============================] - 15s 1ms/step - loss: 0.3315 Epoch 7/10 12735/12735 [==============================] - 15s 1ms/step - loss: 0.3302 Epoch 8/10 12735/12735 [==============================] - 15s 1ms/step - loss: 0.3289 Epoch 9/10 12735/12735 [==============================] - 15s 1ms/step - loss: 0.3280 Epoch 10/10 12735/12735 [==============================] - 15s 1ms/step - loss: 0.3274
<keras.callbacks.History at 0x7f74969cc8d0>
plt.figure(figsize=(8, 4))
plt.plot(anom_treshs, anom_recalls, label='Только сгенерированные аномалии')
plt.plot(anom_treshs, norm_recalls, label='Только нормальные данные')
optimum = anom_treshs[np.argmin(
np.abs(np.array(anom_recalls) - np.array(norm_recalls)))]
plt.vlines(optimum, 0, 1, linestyles='dashed', label='Оптимум')
plt.title(f'Оптимум - {optimum:.2f}')
plt.xlabel('Порог')
plt.ylabel('Доля корректных')
plt.legend()
<matplotlib.legend.Legend at 0x1d1345cfb70>
Выберем порог, который получили в прошлый раз
from sklearn.metrics import recall_score, precision_score
tresh = optimum
anoms_pred = np.where(diff >= optimum)[0]
recall = intersection_over_true(data.shape[0], anom_idxs_start, anom_lens,
anoms_pred, window_length, recall_score)
precision = intersection_over_true(data.shape[0], anom_idxs_start, anom_lens,
anoms_pred, window_length, precision_score)
print(f'Полнота={recall:.3f}, Точность={precision:.3f}, Порог={tresh:.2f}')
Полнота=0.940, Точность=0.579, Порог=26.40
# Blue - anomalies predicted by model
# Red - anomalies that was not catched by model
# Green - original data, that is neither anomaly nor predicted as anomaly
show(p)